# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

import codecs
import json
import pymysql

from scrapy.pipelines.images import ImagesPipeline
from twisted.enterprise import adbapi


class ReptilianPipeline(object):
    def process_item(self, item, spider):
        print(1)
        return item


class JsonWithEncodingPipeline(object):

    def __init__(self):
        # 和open最大的区别是避免了编码问题
        self.file = codecs.open('article.json', 'w', encoding='utf-8')

    def process_item(self, item, spider):
        # ensure_ascii 设置false，不然写入中文会出错
        lines = json.dumps(dict(item), ensure_ascii=False) + "\n"
        self.file.write(lines)
        return item

    def spider_closed(self, spider):
        # scrapy 最后会自动调用这个文件
        self.file.close()


# 存入数据库(同步处理)
class MysqlPipeline(object):

    def __init__(self):
        self.conn = pymysql.connect(host='127.0.0.1', user='root', passwd='', db='reptilian', port=3306,
                                    charset='utf8')
        self.cur = self.conn.cursor(cursor=pymysql.cursors.DictCursor)

    def process_item(self, item, spider):
        sql = """
        INSERT INTO `jobbole`(`title`, `url`, `img_url`, `front_image_url`, `tags`, `url_object_id`, `content`, `create_date`) 
        VALUES('{title}','{url}','{img_url}','{front_image_url}','{tags}','{url_object_id}','{content}','{create_date}')
        """.format(title=item['title'], url=item['url'], img_url="".join(item['img_url']),
                   front_image_url=item.get('front_image_url',''), tags=item['tags'],
                   url_object_id=item['url_object_id'], content="", create_date=item['create_date'])

        self.cur.execute(sql)
        self.conn.commit()

    def spider_closed(self, spider):
        # scrapy 最后会自动调用这个文件
        self.cur.close()
        self.conn.close()


# 存入mysql（异步连接池技术）
class MysqlWistedPipline(object):
    @classmethod
    def from_settings(cls, settings):
        dbconf = {
            'host': settings['MYSQL_HOST'],
            'db': settings['MYSQL_NAME'],
            'user': settings['MYSQL_USER'],
            'passwd': settings['MYSQL_PASS'],
            'port': settings['MYSQL_PORT'],
            'charset': 'utf8',
            'cursorclass': pymysql.cursors.DictCursor
        }

        dbpoll = adbapi.ConnectionPool("pymysql", **dbconf)
        return cls(dbpoll)

    def __init__(self, dbpoll):
        self.dbpoll = dbpoll

    def process_item(self, item, spider):
        # 使用twisted将mysql插入变成异步执行
        query = self.dbpoll.runInteraction(self.do_insert, item)
        query.addErrback(self.handle_error, item, spider)  # 处理异常
        return item

    def do_insert(self, cursor, item):
        sql = """
            INSERT INTO `jobbole`(`title`, `url`, `img_url`, `front_image_url`, `tags`, `url_object_id`, `content`, `create_date`) 
            VALUES('{title}','{url}','{img_url}','{front_image_url}','{tags}','{url_object_id}','{content}','{create_date}')
        """.format(title=item['title'], url=item['url'], img_url="".join(item['img_url']),
                   front_image_url=item['front_image_url'], tags=",".join(item['tags']),
                   url_object_id=item['url_object_id'], content="", create_date=item['create_date'])

        cursor.execute(sql)

    def handle_error(self, failure, item, spider):
        # 处理异步插入的异常
        print(failure)


# 自定义图片下载和处理
class ArticleImagePipeline(ImagesPipeline):
    # get_media_requests

    def item_completed(self, results, item, info):
        if 'img_url' not in item:
            return item
        for ok, value in results:
            item['front_image_url'] = value['path']
        return item
